import argparse
import json
import os
import cv2
import numpy as np
from tqdm import tqdm
import base64
import os, json
import sys
llava_path = os.path.dirname(os.path.dirname(__file__))
if llava_path not in sys.path:
    sys.path.append(llava_path)
    print(sys.path)
from qd.tsv_io import TSVSplitProperty


def img_from_base64(imagestring):
    try:
        jpgbytestring = base64.b64decode(imagestring)
        nparr = np.frombuffer(jpgbytestring, np.uint8)
        r = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        return r
    except ValueError:
        return None

def get_image(img_tsv, idx): 
    row = img_tsv[idx]
    key = row[0]
    cv2_im = img_from_base64(row[-1])
    return key, cv2_im

def get_instruct(caption_tsv, idx): 
    row = caption_tsv[idx]
    key = row[0]
    text = json.loads(row[-1])[0]
    question = text['question']
    answer = text['answer']
    return key, question, answer


def cli():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_tsv', type=str, default='TaxCocoVizTextCapFilterV3GPTVInstruct0627')
    parser.add_argument('--output_dir', type=str, default='/datadrive_d/keli/vl-instruct/data')
    parser.add_argument('--split', type=str, default='train')
    
    args = parser.parse_args()
    output_image_dir = '{}/{}/image'.format(args.output_dir, args.input_tsv)
    os.makedirs(output_image_dir, exist_ok=True)
    return args


def main():
    args = cli()
    tsv_filename = args.input_tsv
    img_tsv = TSVSplitProperty(data=tsv_filename, split=args.split)
    caption_tsv = TSVSplitProperty(tsv_filename, args.split, 'caption')
    assert len(img_tsv)==len(caption_tsv)
    output_json = {}
    instruct_list = []
    for img_sample, instruct_sample in tqdm(zip(img_tsv,caption_tsv)):
        assert img_sample[0]==instruct_sample[0]
        data_key = img_sample[0]
        img_fname = '{}/{}/image/{}.jpg'.format(args.output_dir, args.input_tsv, data_key)
        if not os.path.exists(img_fname):
            cv2_im = img_from_base64(img_sample[-1])
            cv2.imwrite(img_fname, cv2_im)

        text_data = json.loads(instruct_sample[-1])
        for i in range(len(text_data)):
            text = text_data[i]
            question = text['caption']
            # answer = text['answer']
            instruct_list.append({'image_id': data_key, 'caption': question})
    output_json['annotations'] = instruct_list

    total_sample = len(instruct_list)
    print('total instruct-answer pairs:', total_sample)
    json_fname = '{}/{}/{}.json'.format(args.output_dir, args.input_tsv, args.split)
    with open(json_fname, 'w') as f:
        json.dump(output_json, f)

if __name__ == '__main__':
    main()
